Feature Extraction
In [2]:
import pandas as pd
###
### Load Data Set
###
df=pd.read_csv(
'cs-training.csv',
sep=',',
header=0)
data = df.drop(
df.columns[0],
axis=1)
# Drop rows with missing column data
data = data.dropna()
###
### Convert Data Into List Of Dict Records
###
data = data.to_dict(orient='records')
###
### Seperate Target and Outcome Features
###
from sklearn.feature_extraction import DictVectorizer
from pandas import DataFrame
vec = DictVectorizer()
df_data = vec.fit_transform(data).toarray()
feature_names = vec.get_feature_names()
df_data = DataFrame(
df_data,
columns=feature_names)
outcome_feature = df_data['SeriousDlqin2yrs']
target_features = df_data.drop('SeriousDlqin2yrs', axis=1)
Generate training and test set
In [3]:
###
### Generate Training and Testing Set
###
from sklearn import cross_validation
"""
X_1: independent (target) variables for first data set
Y_1: dependent (outcome) variable for first data set
X_2: independent (target) variables for the second data set
Y_2: dependent (outcome) variable for the second data set
"""
X_1, X_2, Y_1, Y_2 = cross_validation.train_test_split(
target_features, outcome_feature, test_size=0.5, random_state=0)
Define Classifier
In [5]:
###
### Define Classifier
###
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
Train Classifier
In [6]:
###
### Train Classifier
###
clf.fit(X_1,Y_1)
Out[6]:
Print Accuracy
In [11]:
###
### Print Accuracy and Confusion Matrix
###
output = clf.predict(X_2)
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(output, Y_2)
score = clf.score(X_2, Y_2)
In [15]:
print ("accuracy: {0}".format(score.mean()))
In [16]:
print (matrix)
Save Classifier (A folder called model must be created first)
In [18]:
###
### Save Classifier
###
from sklearn.externals import joblib
joblib.dump(clf, 'model/nb.pkl')
Out[18]:
Ignore Warnings
In [20]:
import warnings
from flask.exthook import ExtDeprecationWarning
warnings.simplefilter('ignore', ExtDeprecationWarning)
Setup Flask
In [21]:
from flask import Flask
from flask.ext.restplus import Api
from flask.ext.restplus import fields
from sklearn.externals import joblib
app = Flask(__name__)
api = Api(
app,
version='1.0',
title='Credit API',
description='A simple Prediction API')
ns = api.namespace('approve_credit',
description='Approve Credit Operations')
Setup parser
In [22]:
parser = api.parser()
parser.add_argument(
'RevolvingUtilizationOfUnsecuredLines',
type=float,
required=True,
help='Total balance on credit cards and personal lines of credit except real estate and no installment debt like car loans divided by the sum of credit limits',
location='form')
parser.add_argument(
'age',
type=float,
required=True,
help='Age of borrower in years',
location='form')
parser.add_argument(
'NumberOfTime30-59DaysPastDueNotWorse',
type=float,
required=True,
help='Number of times borrower has been 30-59 days past due but no worse in the last 2 years.',
location='form')
parser.add_argument(
'DebtRatio',
type=float,
required=True,
help='Monthly debt payments, alimony,living costs divided by monthy gross income',
location='form')
parser.add_argument(
'MonthlyIncome',
type=float,
required=True,
help='Monthly income',
location='form')
parser.add_argument(
'NumberOfOpenCreditLinesAndLoans',
type=float,
required=True,
help='Number of Open loans (installment like car loan or mortgage) and Lines of credit (e.g. credit cards)',
location='form')
parser.add_argument(
'NumberOfTimes90DaysLate',
type=float,
required=True,
help='Number of times borrower has been 90 days or more past due.',
location='form')
parser.add_argument(
'NumberRealEstateLoansOrLines',
type=float,
required=True,
help='Number of mortgage and real estate loans including home equity lines of credit',
location='form')
parser.add_argument(
'NumberOfTime60-89DaysPastDueNotWorse',
type=float,
required=True,
help='Number of mortgage and real estate loans including home equity lines of credit',
location='form')
parser.add_argument(
'NumberOfDependents',
type=float,
required=True,
help='Number of mortgage and real estate loans including home equity lines of credit',
location='form')
Out[22]:
Setup Web API
In [23]:
resource_fields = api.model('Resource', {
'result': fields.String,
})
from flask.ext.restplus import Resource
@ns.route('/')
class CreditApi(Resource):
@api.doc(parser=parser)
@api.marshal_with(resource_fields)
def post(self):
args = parser.parse_args()
result = self.get_result(args)
return result, 201
def get_result(self, args):
debtRatio = args["DebtRatio"]
monthlyIncome = args["MonthlyIncome"]
dependents = args["NumberOfDependents"]
openCreditLinesAndLoans = args["NumberOfOpenCreditLinesAndLoans"]
pastDue30Days = args["NumberOfTime30-59DaysPastDueNotWorse"]
pastDue60Days = args["NumberOfTime60-89DaysPastDueNotWorse"]
pastDue90Days = args["NumberOfTimes90DaysLate"]
realEstateLoansOrLines = args["NumberRealEstateLoansOrLines"]
unsecuredLines = args["RevolvingUtilizationOfUnsecuredLines"]
age = args["age"]
from pandas import DataFrame
df = DataFrame([[
debtRatio,
monthlyIncome,
dependents,
openCreditLinesAndLoans,
pastDue30Days,
pastDue60Days,
pastDue90Days,
realEstateLoansOrLines,
unsecuredLines,
age
]])
clf = joblib.load('model/nb.pkl');
result = clf.predict(df)
if(result[0] == 1.0):
result = "deny"
else:
result = "approve"
return {
"result": result
}
if __name__ == '__main__':
app.run(debug=True)
In [ ]: